pp.mset <- phenomis::reading(ProMetIS::post_processed_dir.c(),
report.c = "none")[, ProMetIS::sets.vc()]
The number of sample and (annotated) features after the 2_post_preprocessed step are as follows:
pp_exprs_mn.ls <- MultiDataSet::as.list(pp.mset)
pp_dims.mn <- t(sapply(pp_exprs_mn.ls, dim))
pp_dims.mn <- cbind(pp_dims.mn,
Annotated = sapply(names(pp.mset),
function(set.c) {
fdata.df <- Biobase::fData(pp.mset[[set.c]])
if (grepl("(proteo|preclin)", set.c)) {
return(nrow(fdata.df))
} else {
return(sum(fdata.df[, "name"] != ""))
}
})[rownames(pp_dims.mn)])
pp_dims.mn <- pp_dims.mn[c("preclinical",
"proteomics_liver",
"metabolomics_liver_c18hypersil_pos",
"metabolomics_liver_hilic_neg",
"proteomics_plasma",
"metabolomics_plasma_c18hypersil_pos",
"metabolomics_plasma_hilic_neg",
"metabolomics_plasma_c18acquity_pos",
"metabolomics_plasma_c18acquity_neg"), ]
rownames(pp_dims.mn) <- c("preclinical",
"liver_proteomics",
"liver_metabo_c18hypersil_pos",
"liver_metabo_hilic_neg",
"plasma_proteomics",
"plasma_metabo_c18hypersil_pos",
"plasma_metabo_hilic_neg",
"plasma_metabo_c18acquity_pos",
"plasma_metabo_c18acquity_neg")
colnames(pp_dims.mn)[1:2] <- c("Features", "Samples")
pp_dims.mn <- pp_dims.mn[, c("Samples", "Features", "Annotated")]
knitr::kable(pp_dims.mn)
| Samples | Features | Annotated | |
|---|---|---|---|
| preclinical | 42 | 200 | 200 |
| liver_proteomics | 42 | 2187 | 2187 |
| liver_metabo_c18hypersil_pos | 42 | 5665 | 138 |
| liver_metabo_hilic_neg | 42 | 2866 | 199 |
| plasma_proteomics | 36 | 446 | 446 |
| plasma_metabo_c18hypersil_pos | 42 | 4788 | 113 |
| plasma_metabo_hilic_neg | 42 | 3131 | 191 |
| plasma_metabo_c18acquity_pos | 42 | 6104 | 78 |
| plasma_metabo_c18acquity_neg | 42 | 1584 | 49 |
pp_dims.ggplot <- phenomis::gg_barplot(pp_dims.mn,
bar_just.n = -0.2,
cex_bar.i = 5,
cex_axis.i = 14,
palette.vc = c(Samples = RColorBrewer::brewer.pal(3, "Set1")[2],
Features = RColorBrewer::brewer.pal(3, "Set1")[1],
Annotated = RColorBrewer::brewer.pal(3, "Set1")[3]),
figure.c = "none")
pp_dims.ggplot <- pp_dims.ggplot + ggplot2::scale_y_continuous(trans = "log10", limits = c(1, NA),
expand = ggplot2::expansion(add = c(0, 1)))
print(pp_dims.ggplot)
ggplot2::ggsave("figures/article_data/sets_dims_postprocessed.pdf", pp_dims.ggplot)
If the datasets are loaded directly from the 5_aggregated step, the number of features will slightly change because this time one specific genotype is selected (LAT and WT, or MX2 and WT; instead of the full LAT and MX2 and WT initial dataset) and the quality control filter will be reapplied to the selected subsets:
proportion of NA < 20% in at least one class
variance > 0 in all classes)
and for the proteomics datasets: imputation of less than 20% of the samples in at least one class
aggreg_mset.ls <- lapply(ProMetIS::genes.vc(),
function(gene.c) {
gene.mset <- phenomis::reading(file.path(ProMetIS::aggregated_dir.c(gene.c)),
report.c = "none")
gene.mset <- gene.mset[, ProMetIS::sets.vc()]
})
names(aggreg_mset.ls) <- ProMetIS::genes.vc()
aggreg_dims.mn <- NULL
for (gene.c in ProMetIS::genes.vc()) {
aggreg_gene.mset <- aggreg_mset.ls[[gene.c]]
aggreg_gene_dims.mn <- t(sapply(names(aggreg_gene.mset),
function(set.c) {
eset <- aggreg_gene.mset[[set.c]]
rev(dim(eset))
}))
colnames(aggreg_gene_dims.mn) <- paste0(gene.c, "_", colnames(aggreg_gene_dims.mn))
aggreg_dims.mn <- cbind(aggreg_dims.mn, aggreg_gene_dims.mn)
}
aggreg_dims.mn <- aggreg_dims.mn[c("preclinical",
"proteomics_liver",
"metabolomics_liver_c18hypersil_pos",
"metabolomics_liver_hilic_neg",
"proteomics_plasma",
"metabolomics_plasma_c18hypersil_pos",
"metabolomics_plasma_hilic_neg",
"metabolomics_plasma_c18acquity_pos",
"metabolomics_plasma_c18acquity_neg"), ]
rownames(aggreg_dims.mn) <- c("preclinical",
"liver_proteomics",
"liver_metabo_c18hypersil_pos",
"liver_metabo_hilic_neg",
"plasma_proteomics",
"plasma_metabo_c18hypersil_pos",
"plasma_metabo_hilic_neg",
"plasma_metabo_c18acquity_pos",
"plasma_metabo_c18acquity_neg")
aggreg_dims.ggplot <- phenomis::gg_barplot(aggreg_dims.mn,
bar_just.n = 0,
cex_bar.i = 5,
cex_axis.i = 14,
palette.vc = c(LAT_Samples = as.character(ProMetIS::palette.vc(light.l = TRUE)["LAT"]),
LAT_Features = as.character(ProMetIS::palette.vc()["LAT"]),
MX2_Samples = as.character(ProMetIS::palette.vc(light.l = TRUE)["MX2"]),
MX2_Features = as.character(ProMetIS::palette.vc()["MX2"])),
figure.c = "none")
aggreg_dims.ggplot <- aggreg_dims.ggplot + ggplot2::scale_y_continuous(trans = "log10", limits = c(1, NA),
expand = ggplot2::expansion(add = c(0, 1)))
print(aggreg_dims.ggplot)
ggplot2::ggsave("figures/article_data/sets_dims_aggregated.pdf", aggreg_dims.ggplot)
ppclin.eset <- pp.mset[["preclinical"]]
ppclin_pie.ggplot <- ProMetIS:::.preclin_pie(Biobase::fData(ppclin.eset), y.c = "category",
geom_text.ls = list(lab.i = 4, legend_text.i = 8, title.i = 18),
label.c = "value")
ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig2_preclinical_categories.pdf", ppclin_pie.ggplot,
width = 7, height = 8)
For each dataset, numerical and graphical quality controls are presented.
The objective is to check that the WT lines from the ProMetIS study have similar phenotyping values compared to all other WT lines generated by PHENOMIN ICS.
# intensities of the WT mice in ProMetIS
ppclin_wt.eset <- ppclin.eset[, grep("W....", Biobase::sampleNames(ppclin.eset))]
ppclin_wt.mn <- Biobase::exprs(ppclin_wt.eset)
# transforming the values and computing the means by sex type
ppclin_trans.vc <- Biobase::fData(ppclin_wt.eset)[, "transformation"]
ppclin_wt.mn[ppclin_trans.vc == "inverse", ] <- 1/ppclin_wt.mn[ppclin_trans.vc == "inverse", ]
ppclin_wt.mn[ppclin_trans.vc == "log", ] <- log(ppclin_wt.mn[ppclin_trans.vc == "log", ])
ppclin_wt.mn[ppclin_trans.vc == "sqrt", ] <- sqrt(ppclin_wt.mn[ppclin_trans.vc == "sqrt", ])
ppclin_mean.mn <- t(apply(ppclin_wt.mn, 1,
function(feat.vn)
tapply(feat.vn, Biobase::pData(ppclin_wt.eset)[, "sex"],
function(x) mean(x, na.rm = TRUE))))
ics.eset <- phenomis::reading(file.path(ProMetIS::processed_dir.c(), "ics"), report.c = "none")
# restricting to the WT (C57BL6/N) mice not included in the ProMetIS project
ics_wt.eset <- ics.eset[, Biobase::pData(ics.eset)[, "gene"] == "WT" &
!(Biobase::pData(ics.eset)[, "impc_id"] %in% Biobase::pData(ppclin.eset)[, "impc_id"])]
ics_wt.mn <- Biobase::exprs(ics_wt.eset)
message("Number of all WT PHENOMIN WT mice except those from the ProMetIS project: ", ncol(ics_wt.eset))
message("Number of males and females:")
print(table(Biobase::pData(ics_wt.eset)[, "sex"]))
##
## F M
## 539 573
ics_trans.vc <- Biobase::fData(ics_wt.eset)[, "transformation"]
message("Percentage of variables with a normal distribution (possibly following a transformation): ",
round((1 - sum(ics_trans.vc == "non parametric") / length(ics_trans.vc)) * 100), "%")
ics_wt.mn[ics_trans.vc == "inverse", ] <- t(apply(ics_wt.mn[ics_trans.vc == "inverse", ], 1,
function(feat.vn) {
feat.vn[abs(feat.vn) < .Machine$double.eps] <- NA
1 / feat.vn
}))
ics_wt.mn[ics_trans.vc == "log", ] <- t(apply(ics_wt.mn[ics_trans.vc == "log", ], 1,
function(feat.vn) {
feat.vn[abs(feat.vn) < .Machine$double.eps] <- NA
log(feat.vn)
}))
ics_wt.mn[ics_trans.vc == "sqrt", ] <- sqrt(ics_wt.mn[ics_trans.vc == "sqrt", ])
ics_range.mn <- matrix(0, nrow = nrow(ics_wt.mn), ncol = 4,
dimnames = list(rownames(ics_wt.mn), c("F_inf", "F_sup", "M_inf", "M_sup")))
sex.vc <- Biobase::pData(ics_wt.eset)[, "sex"]
ics_range.mn[ics_trans.vc == "non parametric", ] <- t(apply(ics_wt.mn[ics_trans.vc == "non parametric", ], 1,
function(feat.vn) {
c(sapply(names(table(sex.vc)),
function(sex.c) {
quantile(feat.vn[sex.vc == sex.c],
c(0.05/2, 1 - 0.05/2),
na.rm = TRUE)
}))
}))
ics_range.mn[ics_trans.vc != "non parametric", ] <- t(apply(ics_wt.mn[ics_trans.vc != "non parametric", ], 1,
function(feat.vn) {
c(sapply(names(table(sex.vc)),
function(sex.c) {
feat_sex.vn <- feat.vn[sex.vc == sex.c]
mean(feat_sex.vn, na.rm = TRUE) + c(-1, 1) * 2 * sd(feat_sex.vn, na.rm = TRUE)
}))
}))
ppclin_mean.mi <- cbind(F = ics_range.mn[, "F_inf"] <= ppclin_mean.mn[, "F"] &
ppclin_mean.mn[, "F"] <= ics_range.mn[, "F_sup"],
M = ics_range.mn[, "M_inf"] <= ppclin_mean.mn[, "M"] &
ppclin_mean.mn[, "M"] <= ics_range.mn[, "M_sup"])
mode(ppclin_mean.mi) <- "numeric"
ppclin_pass.vl <- rowSums(ppclin_mean.mi) == 2
ppclin_unpass.mn <- cbind(F = ppclin_mean.mn[, "F"], ics_range.mn[, c("F_inf", "F_sup")],
M = ppclin_mean.mn[, "M"], ics_range.mn[, c("M_inf", "M_sup")])[which(is.na(ppclin_pass.vl) | !ppclin_pass.vl), , drop = FALSE]
print(ppclin_unpass.mn)
## F F_inf F_sup M M_inf
## Eye_Left.vitreous.humor.thickness..µm. 566 483.6572 573.9499 560.6667 504.8498
## M_sup
## Eye_Left.vitreous.humor.thickness..µm. 559.8606
# 'Gross Pathology_Body Weight (g)' contains to few values to be checked by the reference range
ppclin_pass.vl["Gross Pathology_Body Weight (g)"] <- TRUE
# 'Eye morphology (OCT + slit lamp)_Left vitreous humor thickness (µm)' is kept although the mean in males (560.6667) is slightly higher than the 2 * SD (559.86062)
ppclin_pass.vl["Eye morphology (OCT + slit lamp)_Left vitreous humor thickness (µm)"] <- TRUE
stopifnot(all(ppclin_pass.vl))
wt.eset <- ics.eset[, Biobase::pData(ics.eset)[, "gene"] == "WT"]
wt.pca <- ropls::opls(wt.eset, predI = 4)
## PCA
## 1127 samples x 200 variables
## standard scaling of predictors
## 31007 (14%) NAs
## R2X(cum) pre ort
## Total 0.279 4 0
A total of 1127 WT lines and 200 features are available.
Biobase::pData(wt.eset)[, "prometis"] <- factor(ifelse(Biobase::pData(wt.eset)[, "impc_id"] %in%
Biobase::pData(ppclin.eset)[, "impc_id"],
"ProMetIS", "Other PHENOMIN"),
levels = c("ProMetIS", "Other PHENOMIN"))
stopifnot(sum(Biobase::pData(wt.eset)[, "prometis"] == "ProMetIS") == 15)
The score plots in the first 4 dimensions are:
for (comp.i in 1:2)
ropls::plot(wt.pca,
parCompVi = c(1, 2) + 2 * (comp.i - 1),
typeVc = "x-score")
techval_ppclin_pca.ggplot <- ProMetIS:::.preclinical_wt_scoreplot(wt.eset, wt.pca)
print(techval_ppclin_pca.ggplot)
ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_Fig5B_techval_preclin_pca.pdf"),
techval_ppclin_pca.ggplot,
height = 7, width = 7)
The loadings are displayed below (in particular the 3 features with most extreme values):
for (comp.i in 1:2)
ropls::plot(wt.pca,
parCompVi = c(1, 2) + 2 * (comp.i - 1),
typeVc = "x-loading")
In particular, ALP and body weight have a strong influence on the first dimension (which discriminates M/F; see below).
wt_load.mn <- ropls::getLoadingMN(wt.pca)
for (comp.i in 1:4) {
load.vn <- wt_load.mn[, comp.i]
load_ext.vi <- c(order(load.vn)[1:3], rev(order(load.vn, decreasing = TRUE)[1:3]))
print(wt_load.mn[load_ext.vi, comp.i, drop = FALSE])
}
## p1
## Grip test_Weight (g) -0.1854476
## Rotarod test_Body weight (g) -0.1847274
## Auditory startle reflex reactivity and PPI_Body weight (g) -0.1838232
## Pavlovian fear conditioning_Freezing Cue 2 0.1017683
## Pavlovian fear conditioning_Freezing (%) Cue-2 0.1017685
## Clinical chemistry parameters_ALP (U/l) 0.1570950
## p2
## Open field test_%Time Spent in the Center glob -0.2538433
## Open field test_Permanence Time center (s) -0.2538282
## Open field test_% distance in the Center glob. (cm) -0.2515523
## Open field test_av. speed center (cm/s) 0.1539070
## Open field test_Distance periphery (cm) 0.1886944
## Open field test_Permanence Time periphery (s) 0.2538322
## p3
## Open field test_Resting Time whole arena (s) -0.2013668
## Open field test_Resting Time periphery (s) -0.1922035
## Pavlovian fear conditioning_freezing total context -0.1364169
## Open field test_Dist I2 (cm) 0.2131457
## Open field test_Distance whole arena (cm) 0.2396825
## Open field test_Dist global (cm) 0.2396825
## p4
## Auditory startle reflex reactivity and PPI_Gl -0.2093506
## Auditory startle reflex reactivity and PPI_% PP75 -0.2054430
## Grip test_Grip mean (4paws) (g) -0.1865101
## Auditory startle reflex reactivity and PPI_PP85 0.1765333
## Auditory startle reflex reactivity and PPI_PP90 0.1783836
## Auditory startle reflex reactivity and PPI_PP75 0.1844418
The first axis is related to weight, and Glucose, HDL, Albumin and Alkalyne Phosphatase:
for (feat.c in c("Grip test_Weight (g)",
"GTT_Glucose T120 (mmol/l)",
"Clinical chemistry parameters_HDL Chol. (mmol/l)",
"Clinical chemistry parameters_Albumin (g/l)",
"Clinical chemistry parameters_ALP (U/l)")) {
ropls::plot(wt.pca,
typeVc = "x-score",
parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
parCompVi = c(1, 2),
parLabVc = rep("x", dim(wt.eset)["Samples"]),
parTitleL = FALSE)
title(feat.c)
}
In particular, we observe that mice tend to be separated according to sex along the first axis:
ropls::plot(wt.pca, typeVc = "x-score", parAsColFcVn = Biobase::pData(wt.eset)[, "sex"])
The second axis is related to behavior (permanence time in periphery):
for (feat.c in c("Open field test_Permanence Time periphery (s)")) {
ropls::plot(wt.pca,
typeVc = "x-score",
parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
parCompVi = c(1, 2),
parLabVc = rep("x", dim(wt.eset)["Samples"]),
parTitleL = FALSE)
title(feat.c)
}
The two clusters on the 3-4 hyperplane are related to Haematology (RBC: red blood cell count; HGB: hemoglobin; HCT: hematocrit).
for (feat.c in c("Haematology - Complete blood count (CBC)on the Advia 120_RBC (x10E06 cells/µL)",
"Haematology - Complete blood count (CBC)on the Advia 120_HGB (g/dL)",
"Haematology - Complete blood count (CBC)on the Advia 120_HCT (%)")) {
ropls::plot(wt.pca,
typeVc = "x-score",
parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
parCompVi = c(3, 4),
parLabVc = rep("x", dim(wt.eset)["Samples"]),
parTitleL = FALSE)
title(feat.c)
}
The orthogonal variation is related to behavior (global distance):
for (feat.c in c("Open field test_Dist global (cm)")) {
ropls::plot(wt.pca,
typeVc = "x-score",
parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
parCompVi = c(3, 4),
parLabVc = rep("x", dim(wt.eset)["Samples"]),
parTitleL = FALSE)
title(feat.c)
}
techval_proteo_dir.c <- "../supplementary/technical_validation/proteomics"
# liver
prot_liver_file.c <- file.path(techval_proteo_dir.c,
"Données pour figures Strasbourg QC.xlsx")
# plasma
prot_plasma_file.c <- file.path(techval_proteo_dir.c,
"figureQC_proteomique.xlsx")
# liver
irt_liver.tb <- suppressMessages(readxl::read_excel(prot_liver_file.c,
sheet = 1))
irt_liver.df <- as.data.frame(irt_liver.tb,
stringsAsFactors = FALSE)
irt_liver.mn <- as.matrix(irt_liver.df[-1, -1])
mode(irt_liver.mn) <- "numeric"
rownames(irt_liver.mn) <- irt_liver.df[, 1][-1]
pept_liver.vc <- rownames(irt_liver.mn)
rownames(irt_liver.mn) <- paste0("P", 1:nrow(irt_liver.mn))
# plasma
irt_plasma.tb <- suppressMessages(readxl::read_excel(prot_plasma_file.c,
sheet = 2))
irt_plasma.df <- as.data.frame(irt_plasma.tb,
stringsAsFactors = FALSE)
irt_plasma.mn <- as.matrix(irt_plasma.df[3:13, 21:65])
mode(irt_plasma.mn) <- "numeric"
rownames(irt_plasma.mn) <- irt_plasma.df[, 2][3:13]
pept_plasma.vc <- rownames(irt_plasma.mn)
stopifnot(identical(pept_liver.vc, pept_plasma.vc))
rownames(irt_plasma.mn) <- rownames(irt_liver.mn)
# liver
cv_liver.tb <- suppressMessages(readxl::read_excel(prot_liver_file.c,
sheet = 2,
range = "A1:E2469"))
cv_liver.df <- as.data.frame(cv_liver.tb,
stringsAsFactors = FALSE)
cv_liver.df <- cbind.data.frame(type = factor(rep(c("Pool", "WT", "LAT", "MX2"),
each = nrow(cv_liver.df)),
levels = c("Pool", "WT", "LAT", "MX2")),
value = c(cv_liver.df[, 2], cv_liver.df[, 3],
cv_liver.df[, 4], cv_liver.df[, 5]))
# plasma
cv_plasma.tb <- suppressMessages(readxl::read_excel(prot_plasma_file.c,
sheet = 4,
range = "A1:BT622"))
cv_plasma.df <- as.data.frame(cv_plasma.tb,
stringsAsFactors = FALSE)
cv_plasma.df <- cbind.data.frame(type = factor(rep(c("WT", "MX2", "LAT", "Pool"),
each = nrow(cv_plasma.df)),
levels = c("Pool", "WT", "LAT", "MX2")),
value = c(cv_plasma.df[, 69], cv_plasma.df[, 70],
cv_plasma.df[, 71], cv_plasma.df[, 72]))
irt_liver.gg <- ProMetIS:::.irt_plot(irt_liver.mn, "liver")
irt_plasma.gg <- ProMetIS:::.irt_plot(irt_plasma.mn, "plasma")
cv_liver.gg <- ProMetIS:::.cv_ggplot(cv_liver.df, "liver")
cv_plasma.gg <- ProMetIS:::.cv_ggplot(cv_plasma.df, "plasma")
prot_qc_plot <- gridExtra::grid.arrange(irt_liver.gg,
cv_liver.gg,
irt_plasma.gg,
cv_plasma.gg,
nrow = 2, ncol = 2,
widths = c(4, 2),
heights = c(3, 3))
ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig6_techval_proteo_irt_cv.pdf", prot_qc_plot, width = 12, height = 7)
Loading the processed datasets:
p.mset <- phenomis::reading(ProMetIS::processed_dir.c(),
report.c = "none")
pmetabo.mset <- p.mset[, ProMetIS::metabo_sets.vc()]
Post-processing in the ‘technical validation’ mode (keeping standards, keeping pools, omitting the pool_CV <= 30% and pool_CV_over_sample_CV <= 1 filters)
pmsmetabo.mset <- ProMetIS:::.metabo_postprocessing(metabo.mset = pmetabo.mset,
drift_correct.c = "prometis",
.technical_validation.l = TRUE)
## [1] "metabolomics_liver_c18hypersil_pos"
## [1] "metabolomics_liver_hilic_neg"
## [1] "metabolomics_plasma_c18hypersil_pos"
## [1] "metabolomics_plasma_hilic_neg"
## [1] "metabolomics_plasma_c18acquity_pos"
## [1] "metabolomics_plasma_c18acquity_neg"
# save(pmsmetabo.mset, file = "../supplementary/technical_validation/metabolomics/metabo_mset.rdata")
# load("../supplementary/technical_validation/metabolomics/metabo_mset.rdata")
standards_info.vc <- c("2-Aminoanthracene [ExS]_#0000FF",
"Alanine 13C [ExS]_#0040FF",
"Amiloride [ExS]_#0080FF",
"Aspartate 15N [ExS]_#00BFFF",
"Atropine [ExS]_#00FFFF",
"Colchicine [ExS]_#00FFBF",
"Dihydrostreptomycin [ExS]_#00FF80",
"Ethylmalonic acid [ExS]_#00FF40",
"Glucose 13C [ExS]_#00FF00",
"Imipramine [ExS]_#40FF00",
"Metformin [ExS]_#80FF00",
"Prednisone [ExS]_#BFFF00",
"Roxithromycin (fragment) [ExS]_#FFFF00",
"AMPA [IS]_#FFBF00",
"Dimetridazole [IS]_#FF8000",
"Dinoseb [IS]_#FF4000",
"MCPA [IS]_#FF0000")
standards_type.vc <- sapply(standards_info.vc,
function(info.c)
unlist(strsplit(info.c, "_"))[1])
standards_name.vc <- sapply(standards_type.vc,
function(standard.c)
unlist(strsplit(standard.c, " [", fixed = TRUE))[1])
palette.vc <- sapply(standards_info.vc,
function(info.c)
unlist(strsplit(info.c, "_"))[2])
palette.vc <- c(RColorBrewer::brewer.pal(8, "Dark2"),
RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7:9)],
RColorBrewer::brewer.pal(3, "Set2")[1])
names(palette.vc) <- standards_name.vc
out_sample.ls <- standards_cv.ls <- list()
for (set.c in grep("(c18hyper|hilic)", names(pmsmetabo.mset), value = TRUE)) {
# set.c <- grep("(c18hyper|hilic)", names(pmsmetabo.mset), value = TRUE)[3]
eset <- pmsmetabo.mset[[set.c]]
eset <- eset[Biobase::fData(eset)[, "standard"] != "",
Biobase::pData(eset)[, "sampleType"] %in% c("pool", "sample")]
#ggplot needs a dataframe
exprs.mn <- Biobase::exprs(eset)
tlog2exprs.df <- as.data.frame(t(log2(1 + exprs.mn)))
colnames(tlog2exprs.df) <- Biobase::fData(eset)[, "standard"]
# re-ordering
tlog2exprs.df <- tlog2exprs.df[, standards_type.vc[standards_type.vc %in% colnames(tlog2exprs.df)]]
# computing CVs
standards_cv.ls[[set.c]] <- apply(as.matrix(tlog2exprs.df), 2, function(x) sd(x) / mean(x))
#id variable for position in matrix
tlog2exprs.df$id <- 1:nrow(tlog2exprs.df)
#reshape to long format
ggplot.df <- reshape2::melt(tlog2exprs.df, id.var = "id")
ggplot.df[, "sample"] <- factor(rep(gsub("pool1", "pool",
Biobase::pData(eset)[, "sampleType"]),
ncol(tlog2exprs.df) - 1),
levels = c("pool",
"sample"))
ggplot.df[, "sample_name"] <- rep(Biobase::sampleNames(eset),
ncol(tlog2exprs.df) - 1)
ggplot.df[, "compound_standard"] <- ggplot.df[, "variable"]
ggplot.df[, "compound"] <- vapply(as.character(ggplot.df[, "compound_standard"]),
function(comp.c) {
bracket.i <- which(unlist(strsplit(comp.c, split = "")) == "[")
substr(comp.c, 1, bracket.i - 2)
}, character(1))
ggplot.df[, "standard"] <- vapply(as.character(ggplot.df[, "compound_standard"]),
function(comp.c) {
bracket.i <- which(unlist(strsplit(comp.c, split = "")) == "[")
substr(comp.c, bracket.i, nchar(comp.c))
}, character(1))
ggplot.df[, "injectionOrder"] <- ggplot.df[, "id"]
ggplot.df[, "intensity"] <- ggplot.df[, "value"]
ggplot.df[, "mean"] <- rep(tapply(ggplot.df[, "intensity"], ggplot.df[, "variable"], mean), table(ggplot.df[, "variable"]))
ggplot.df[, "sd"] <- rep(tapply(ggplot.df[, "intensity"], ggplot.df[, "variable"], sd), table(ggplot.df[, "variable"]))
ggplot.df[, "lcl"] <- ggplot.df[, "mean"] - 2 * ggplot.df[, "sd"]
ggplot.df[, "ucl"] <- ggplot.df[, "mean"] + 2 * ggplot.df[, "sd"]
ggplot.df[, "zscore"] <- (ggplot.df[, "intensity"] - ggplot.df[, "mean"]) / ggplot.df[, "sd"]
ggplot.df[, "pval"] <- format(2 * (1 - pnorm(abs(ggplot.df[, "zscore"]))), scientific = TRUE)
out_sample.vl <- abs(ggplot.df[, "zscore"]) > 3
if (sum(out_sample.vl)) {
out_sample.ls[[set.c]] <- ggplot.df[out_sample.vl, c("sample_name",
"compound_standard",
"injectionOrder",
"intensity",
"lcl",
"ucl",
"zscore",
"pval"), drop = FALSE]
} else
out_sample.ls[[set.c]] <- data.frame()
#plot
standards.ggplot <- ggplot2::ggplot(ggplot.df,
ggplot2::aes(x = injectionOrder,
y = intensity,
group = compound,
colour = compound)) +
ggplot2::geom_point(ggplot2::aes(shape = sample)) +
ggplot2::scale_shape_manual(values = c(5, 18)) +
ggplot2::geom_line(ggplot2::aes(lty = standard), size = 0.5) +
ggplot2::scale_linetype_manual(values = c("solid", "dotted"))+
# ggplot2::geom_line(ggplot2::aes(x = injectionOrder, y = mean, colour = compound), lty = "solid", size = 0.5) +
ggplot2::geom_ribbon(ggplot2::aes(x = injectionOrder, ymin = lcl, ymax = ucl, group = compound,
fill = compound), linetype = "dashed", alpha = 0.1) +
ggplot2::labs(title = gsub("metabolomics_", "", set.c),
x = "Injection order", y = "Intensity (log2)") +
ggplot2::scale_color_manual(values = palette.vc[unique(ggplot.df[, "compound"])]) +
ggplot2::scale_fill_manual(values = palette.vc[unique(ggplot.df[, "compound"])]) +
# ggplot2::scale_color_brewer(palette = "Set3") +
# ggplot2::scale_fill_brewer(palette = "Set3") +
ggplot2::coord_cartesian(ylim = c(13, 28)) +
ggplot2::theme_light() +
ggplot2::theme(plot.title = ggplot2::element_text(size = 20, face = "bold"),
axis.text = ggplot2::element_text(size = 17),
axis.title = ggplot2::element_text(size = 17, face = "bold"),
legend.title = ggplot2::element_text(size = 17, face = "bold"),
legend.text = ggplot2::element_text(size = 17))
# ggplot2::geom_rug(sides = "b", mapping = ggplot2::aes(group = sample))
ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS5_metabo_standards_",
gsub("metabolomics_", "", set.c), ".pdf"),
standards.ggplot, width = 12)
}
print(round(c(min = min(unlist(standards_cv.ls)),
mean = mean(unlist(standards_cv.ls)),
max = max(unlist(standards_cv.ls)),
sd = sd(unlist(standards_cv.ls))) * 100))
## min mean max sd
## 0 2 5 1
print(out_sample.ls)
## $metabolomics_liver_c18hypersil_pos
## sample_name compound_standard injectionOrder intensity
## 60 X20180906_827F_pos_021 Alanine 13C [ExS] 8 13.44732
## 178 X20180906_818F_pos_035 Aspartate 15N [ExS] 22 21.13316
## 306 X20180906_627F_pos_060 Imipramine [ExS] 46 24.30825
## 358 X20180906_627F_pos_060 Prednisone [ExS] 46 21.59884
## lcl ucl zscore pval
## 60 14.61297 17.75892 -3.482105 4.974895e-04
## 178 19.49298 20.69972 3.436734 5.887746e-04
## 306 24.57520 25.34977 -3.378586 7.285958e-04
## 358 22.21808 23.29546 -4.299067 1.715190e-05
##
## $metabolomics_liver_hilic_neg
## sample_name compound_standard injectionOrder intensity lcl
## 38 X20180910_629F_neg_061 Amiloride [ExS] 38 15.10089 17.56935
## 48 X20180910_QC.EI_neg_071 Amiloride [ExS] 48 15.19781 17.56935
## 189 X20180910_634F_neg_056 Glucose 13C [ExS] 33 14.64951 17.47429
## ucl zscore pval
## 38 21.39874 -4.578437 4.684638e-06
## 48 21.39874 -4.477195 7.563028e-06
## 189 20.95346 -5.247646 1.540546e-07
##
## $metabolomics_plasma_c18hypersil_pos
## sample_name compound_standard injectionOrder intensity
## 126 X20180906_818P_pos_035 Aspartate 15N [ExS] 22 19.72965
## 210 X20180906_617P_pos_015 Colchicine [ExS] 2 21.59074
## 262 X20180906_617P_pos_015 Dihydrostreptomycin [ExS] 2 22.40049
## 366 X20180906_617P_pos_015 Imipramine [ExS] 2 24.40131
## 542 X20180906_818P_pos_035 Dimetridazole [IS] 22 26.20310
## 552 X20180906_836P_pos_046 Dimetridazole [IS] 32 26.25760
## lcl ucl zscore pval
## 126 18.49164 19.38674 3.532401 4.118048e-04
## 210 21.98452 23.04404 -3.486650 4.891107e-04
## 262 22.62489 23.43031 -3.114410 1.843131e-03
## 366 24.71776 25.80387 -3.165454 1.548410e-03
## 542 26.40534 26.99094 -3.381358 7.212841e-04
## 552 26.40534 26.99094 -3.009107 2.620172e-03
##
## $metabolomics_plasma_hilic_neg
## sample_name compound_standard injectionOrder intensity lcl
## 4 X20180910_513P_neg_027 Amiloride [ExS] 4 15.28016 16.82327
## 269 X20180910_631P_neg_032 Dinoseb [IS] 9 27.25426 27.48381
## 340 X20180910_527P_neg_051 MCPA [IS] 28 25.41963 25.71633
## ucl zscore pval
## 4 20.37539 -3.737676 1.857294e-04
## 269 28.02777 -3.687924 2.260913e-04
## 340 26.32446 -3.951528 7.765389e-05
print(max(abs(Reduce("rbind.data.frame", out_sample.ls)[, "zscore"])))
## [1] 5.247646
sapply(out_sample.ls, nrow)
## metabolomics_liver_c18hypersil_pos metabolomics_liver_hilic_neg
## 4 3
## metabolomics_plasma_c18hypersil_pos metabolomics_plasma_hilic_neg
## 6 3
for (set.c in names(out_sample.ls)) {
eset <- pmsmetabo.mset[[set.c]]
eset.pca <- ropls::opls(eset, fig.pdfC = "none", info.txtC = "none", predI = 4)
ropls::plot(eset.pca,
typeVc = "x-score",
parAsColFcVn = ifelse(Biobase::sampleNames(eset) %in% out_sample.ls[[set.c]][, "sample_name"], 1, 0),
parTitleL = FALSE)
title(gsub("metabolomics_", "", set.c))
}
quality_metrics.ls <- ProMetIS:::.metabo_quality_metrics(pmsmetabo.mset)
# save(quality_metrics.ls, file = "../supplementary/technical_validation/metabolomics/quality_metrics_ls.rdata")
# load("../supplementary/technical_validation/metabolomics/quality_metrics_ls.rdata")
quality_table.ls <- list()
for (method.c in c("none", "pool", "sample", "prometis")) {
load(paste0("../supplementary/technical_validation/metabolomics/quality_metrics_ls",
ifelse(method.c != "prometis", paste0("_", method.c), ""),
".rdata"))
quality.mn <- quality_metrics.ls[["quality.mn"]]
quality.mn <- t(quality.mn)[, c("correl_inj_test",
"correl_inj_pca",
"pool_spread_pca",
"poolCV_0.3",
"ICCpool")]
colnames(quality.mn) <- c("Drift Spearman", "Drift PCA", "QC spread", "QC CV", "QC ICC")
for (metric.c in c("Drift Spearman", "Drift PCA", "QC spread"))
quality.mn[, metric.c] <- 100 - quality.mn[, metric.c]
quality.mn <- round(quality.mn)
quality_table.ls[[method.c]] <- quality.mn
}
min_quality.mn <- t(sapply(quality_table.ls, function(matrix.mn) {
c(drift = min(matrix.mn[, 1:2]), qc = min(matrix.mn[, 3:5]))
}))
rownames(min_quality.mn) <- c("None", "Pools", "Samples", "ProMetIS")
plot(min_quality.mn, type = "n", xlim = c(0, 100), ylim = c(0, 100),
xlab = "Drift metrics", ylab = "QC metrics",
main = "Minimum quality metric values\ndepending on the correction method")
text(min_quality.mn, labels = rownames(min_quality.mn))
write.table(quality_table.ls[["prometis"]],
col.names = NA,
file = "figures/article_data/ImbertEtAl_Table1_techval_metabo_metrics.tsv",
sep = "\t")
rowMeans(quality_table.ls[["prometis"]])
## liver_c18hypersil_pos liver_hilic_neg plasma_c18hypersil_pos
## 85.2 89.0 92.2
## plasma_hilic_neg plasma_c18acquity_pos plasma_c18acquity_neg
## 93.8 83.8 88.2
quality_ggparcoord.ls <- quality_ggradar.ls <- list()
for (method.c in c("none", "pool", "sample", "prometis")) {
quality.df <- as.data.frame(quality_table.ls[[method.c]])
quality.df <- cbind.data.frame(group = factor(rownames(quality.df),
levels = rownames(quality.df)),
quality.df)
quality_ggradar.ls[[method.c]] <- ggradar::ggradar(quality.df,
base.size = 15,
values.radar = c("0", "50", "100"),
grid.min = 0, grid.mid = 50, grid.max = 100,
# Polygones
group.line.width = 2,
group.point.size = 3,
group.colours = RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7)],
# Background
background.circle.colour = "white",
gridline.mid.colour = "grey",
# Legend
plot.title = method.c,
legend.text.size = 15,
legend.position = "right")
quality_ggparcoord.ls[[method.c]] <- GGally::ggparcoord(quality.df, columns = c(3, 2, 4:6),
groupColumn = 1,
scale = "globalminmax") +
ggplot2::geom_line(size = 2) +
ggplot2::labs(title = "",
x = "", y = "Score") +
ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7)]) +
ggplot2::coord_cartesian(ylim = c(0, 100)) +
ggplot2::theme_light() +
ggplot2::theme(plot.title = ggplot2::element_text(size = 20, face = "bold"),
axis.text = ggplot2::element_text(size = 17),
axis.title = ggplot2::element_text(size = 17, face = "bold"),
legend.position = "bottom",
legend.title = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 17))
}
ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig10A_techval_metabo_radar.pdf",
quality_ggradar.ls[["prometis"]],
height = 7, width = 10.5)
ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig10Abis_techval_metabo_parallel.pdf",
quality_ggparcoord.ls[["prometis"]],
height = 7, width = 10.5)
for (method.c in setdiff(names(quality_ggradar.ls), "prometis")) {
ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS6_techval_metabo_radar_", method.c, ".pdf"),
quality_ggradar.ls[[method.c]],
height = 7, width = 10.5)
ggplot2::ggsave(paste0("../supplementary/technical_validation/metabolomics/ImbertEtAl_FigS6bis_techval_metabo_parallel_", method.c, ".pdf"),
quality_ggparcoord.ls[[method.c]],
height = 7, width = 10.5)
}
scoreplot.ls <- vector(mode = "list", length = length(ProMetIS::metabo_sets.vc()))
names(scoreplot.ls) <- ProMetIS::metabo_sets.vc()
for (set.c in names(pmsmetabo.mset)) {
pmsmetabo.eset <- pmsmetabo.mset[[set.c]]
pmsmetabo.eset <- pmsmetabo.eset[, Biobase::pData(pmsmetabo.eset)[, "sampleType"] %in% c("pool", "sample")]
pdata.df <- Biobase::pData(pmsmetabo.eset)
pdata.df[, "order"] <- pdata.df[, "injectionOrder"]
if (grepl("acqui", set.c))
pdata.df[, "order"] <- pdata.df[, "order"] - min(pdata.df[, "order"]) + 1
pdata.df[, "label"] <- ifelse(pdata.df[, "sampleType"] == "pool", "QC", "s")
Biobase::pData(pmsmetabo.eset) <- pdata.df
pmsmetabo.pca <- ropls::opls(pmsmetabo.eset, predI = 2, fig.pdfC = "none", info.txtC = "none")
scoreplot.ls[[set.c]] <- ProMetIS:::score_plotly(pmsmetabo.pca,
label.c = "label",
color.c = "order",
title.c = gsub("metabolomics_", "", set.c),
figure.c = "interactive",
size.ls = list(axis_lab.i = 12,
axis_text.i = 10,
point.i = 2,
label.i = 4,
title.i = 15,
legend_title.i = 10,
legend_text.i = 10),
plot.l = FALSE)
}
scoreplots.p <- gridExtra::grid.arrange(grobs = scoreplot.ls[c("metabolomics_liver_c18hypersil_pos",
"metabolomics_plasma_c18hypersil_pos",
"metabolomics_plasma_c18acquity_pos",
"metabolomics_liver_hilic_neg",
"metabolomics_plasma_hilic_neg",
"metabolomics_plasma_c18acquity_neg")],
nrow = 2)
ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_Fig7B_techval_metabo_pca.pdf"), scoreplots.p,
height = 7, width = 10.5)
cv_ggplot.ls <- vector(mode = "list", length = length(names(pmsmetabo.mset)))
names(cv_ggplot.ls) <- names(pmsmetabo.mset)
for (set.c in names(pmsmetabo.mset)) {
# set.c <- names(pmsmetabo.mset)[2]
pmsmetabo.eset <- pmsmetabo.mset[[set.c]]
pmsmetabo.eset <- pmsmetabo.eset[, Biobase::pData(pmsmetabo.eset)[, "sampleType"] %in% c("pool", "sample")]
genotype.vc <- tolower(substr(Biobase::pData(pmsmetabo.eset)[,
ifelse(grepl("(hyper|hilic)", set.c),
"KO", "Type")], 1, 1))
type.vc <- vapply(seq_len(Biobase::dims(pmsmetabo.eset)["Samples", 1]),
function(sample.i) {
if (Biobase::pData(pmsmetabo.eset)[sample.i, "sampleType"] == "pool") {
return("Pool")
} else if (genotype.vc[sample.i] == "l") {
return("LAT")
} else if (genotype.vc[sample.i] == "m") {
return("MX2")
} else {
return("WT")
}
}, FUN.VALUE = character(1))
cv.mn <- 100 * as.matrix(t(apply(Biobase::exprs(pmsmetabo.eset), 1,
function(feat.vn) {
tapply(feat.vn, type.vc,
function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE))
})))
cv.df <- cbind.data.frame(type = factor(rep(c("Pool", "WT", "LAT", "MX2"),
each = nrow(cv.mn)),
levels = c("Pool", "WT", "LAT", "MX2")),
value = c(cv.mn[, "Pool"], cv.mn[, "WT"],
cv.mn[, "LAT"], cv.mn[, "MX2"]))
cv_ggplot.ls[[set.c]] <- ProMetIS:::.cv_ggplot(cv.df, title.c = gsub("metabolomics_", "", set.c))
}
metabo_cv_plot <- gridExtra::marrangeGrob(cv_ggplot.ls,
nrow = 2, ncol = 3)
ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS9_metabo_cv_boxplots.pdf"), metabo_cv_plot,
height = 7, width = 10.5)
pdf("figures/article_data/ImbertEtAl_FigS7_metabo_cv_cumulative.pdf",
height = 7, width = 10.5)
ProMetIS:::.metabo_quality_plot(quality_metrics.ls = quality_metrics.ls, "cv_percent")
## [1] "liver_c18hypersil_pos" "liver_hilic_neg" "plasma_c18hypersil_pos"
## [4] "plasma_hilic_neg" "plasma_c18acquity_pos" "plasma_c18acquity_neg"
dev.off()
## png
## 2
pdf("figures/article_data/ImbertEtAl_FigS8_metabo_icc_cumulative.pdf",
height = 7, width = 10.5)
ProMetIS:::.metabo_quality_plot(quality_metrics.ls = quality_metrics.ls, "icc_intens")
## [1] "liver_c18hypersil_pos" "liver_hilic_neg" "plasma_c18hypersil_pos"
## [4] "plasma_hilic_neg" "plasma_c18acquity_pos" "plasma_c18acquity_neg"
dev.off()
## png
## 2
ppstat.mset <- phenomis::hypotesting(pp.mset,
test.c = "limma",
factor_names.vc = "sex",
figure.c = "none",
report.c = "none")
message("Percentage of features with significant differences between males and females:")
ppstat.sex.vn <- sapply(Biobase::fData(ppstat.mset),
function(fdata.df) {
round(sum(fdata.df[, "limma_sex_F.M_signif"], na.rm = TRUE) / nrow(fdata.df) * 100)
})
ppstat.sex.mn <- as.matrix(ppstat.sex.vn)
colnames(ppstat.sex.mn) <- "%"
print(ppstat.sex.mn)
## %
## preclinical 34
## proteomics_liver 40
## proteomics_plasma 30
## metabolomics_liver_c18hypersil_pos 14
## metabolomics_liver_hilic_neg 14
## metabolomics_plasma_c18hypersil_pos 20
## metabolomics_plasma_hilic_neg 24
## metabolomics_plasma_c18acquity_pos 37
## metabolomics_plasma_c18acquity_neg 37
message("Range: ", paste(range(ppstat.sex.vn), collapse = ", "))
int_range_ggplot.ls <- list()
for (set.c in setdiff(names(pp.mset), "preclinical")) {
# set.c <- "proteomics_liver"
pp.eset <- pp.mset[[set.c]]
pp_exprs.mn <- Biobase::exprs(pp.eset)
pp_pdata.df <- Biobase::pData(pp.eset)
pp_exprs.tb <- tidyr::as_tibble(pp_exprs.mn)
int_range.df <- as.data.frame(tidyr::pivot_longer(pp_exprs.tb,
cols = 1:ncol(pp_exprs.tb),
names_to = "sample",
values_to = "intensity"))
int_range.df[, "gene"] <- factor(pp_pdata.df[int_range.df[, "sample"], "gene"],
levels = c("WT", "LAT", "MX2"))
int_range.df[, "sex"] <- factor(pp_pdata.df[int_range.df[, "sample"], "sex"],
levels = c("M", "F"))
int_samp_order.vc <- rownames(pp_pdata.df[order(factor(pp_pdata.df[, "gene"], levels = c("WT", "LAT", "MX2"))), ])
int_range.df[, "sample"] <- factor(int_range.df[, "sample"],
levels = int_samp_order.vc)
p <- ggplot2::ggplot(int_range.df,
ggplot2::aes(x = sample, y = intensity, fill = sex, col = gene)) +
ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
ggplot2::scale_fill_manual(values = c(M = "lightblue2",
F = "lightpink1")) +
ggplot2::labs(title = set.c, x = "Samples", y = "Intensity (log2)") +
ggplot2::theme_light() +
ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
legend.title = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 11, face = "bold"),
axis.text.x = ggplot2::element_text(size = 11, angle = 90),
axis.title = ggplot2::element_text(size = 13, face = "bold"),
plot.title = ggplot2::element_text(size = 15, face = "bold"))
int_range_ggplot.ls[[set.c]] <- p
}
int_range.gg <- gridExtra::grid.arrange(grobs = int_range_ggplot.ls,
nrow = length(ProMetIS::sets.vc()) - 1, ncol = 2)
ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS10_intensity_range.pdf", int_range.gg,
width = 14, height = 35)
Comparing the CV values from the raw protein intensities provided by the partners and used in Figure 6 and from the processed data in the ProMetIS package:
cv_compare_gg.ls <- vector("list", length = 2)
names(cv_compare_gg.ls) <- ProMetIS::tissues.vc()
for (tissue.c in ProMetIS::tissues.vc()) {
# tissue.c <- "liver"
# cv_liver.df and cv_plasma.df tables provided by the partners and read above (line 451 and following)
cv_partner.df <- eval(parse(text = paste0("cv_", tissue.c, ".df")))
cv_compare_gg.ls[[tissue.c]][["partner"]] <- ProMetIS:::.cv_ggplot(cv_partner.df, "liver [partner]")
# tables obtained from the processed datasets loaded above (line 550 and following)
eset <- pp.mset[[paste0("proteomics_", tissue.c)]]
gene.fc <- factor(Biobase::pData(eset)[, "gene"],
levels = c("WT", "LAT", "MX2"))
cv_processed.mn <- t(apply(2^Biobase::exprs(eset), 1,
function(feat.vn) {
tapply(feat.vn, gene.fc,
function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE))
})) * 100
cv_processed.df <- tidyr::gather(as.data.frame(cv_processed.mn),
key = "gene", value = "cv")
cv_compare_gg.ls[[tissue.c]][["processed"]] <- ProMetIS:::.cv_ggplot(cv_processed.df, "liver [processed]",
color.vc = RColorBrewer::brewer.pal(9, "Set1")[-1])
}
cv_compare_liver.gg <- gridExtra::grid.arrange(grobs = cv_compare_gg.ls[["liver"]],
nrow = 1, ncol = 2)
cv_compare_plasma.gg <- gridExtra::grid.arrange(grobs = cv_compare_gg.ls[["plasma"]],
nrow = 1, ncol = 2)
Plotting the male and female CV separately for each conditions, as computed on the processed data:
ppcv_ggplot.ls <- list()
for (set.c in names(pp.mset)) {
pp.eset <- pp.mset[[set.c]]
ppexprs.mn <- Biobase::exprs(pp.eset)
pp_genesex.vc <- paste0(Biobase::pData(pp.eset)[, "gene"], "_",
Biobase::pData(pp.eset)[, "sex"])
ppcv.mn <- t(apply(ppexprs.mn, 1,
function(feat.vn) {
tapply(feat.vn, pp_genesex.vc, function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE) * 100)
}))
ppcv.df <- as.data.frame(tidyr::pivot_longer(tidyr::as_tibble(ppcv.mn),
cols = 1:ncol(ppcv.mn),
names_to = "features",
values_to = "CV"))
ppcv.df[, "gene"] <- factor(sapply(ppcv.df[, "features"],
function(feat.c)
unlist(strsplit(feat.c, "_"))[1]),
levels = c("WT", "LAT", "MX2"))
ppcv.df[, "sex"] <- factor(sapply(ppcv.df[, "features"],
function(feat.c)
unlist(strsplit(feat.c, "_"))[2]),
levels = c("M", "F"))
ppcv_ggplot.ls[[set.c]] <- ggplot2::ggplot(ppcv.df, ggplot2::aes(x = gene, y = CV, col = gene, fill = sex)) +
ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
ggplot2::scale_fill_manual(values = c(M = "lightblue2",
F = "lightpink1")) +
ggplot2::labs(title = set.c, x = "", y = "CV (%)") +
ggplot2::theme_light() +
ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
legend.title = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 11, face = "bold"),
axis.text = ggplot2::element_text(size = 11, face = "bold"),
plot.title = ggplot2::element_text(size = 15, face = "bold"))
# print(ppcv_ggplot.ls[[set.c]])
}
ppcv_ggplot.vc <- names(ppcv_ggplot.ls)
ppcv_ggplot.ls[["void"]] <- ggplot2::ggplot() + ggplot2::theme_void()
ppcv_ggplot.ls <- ppcv_ggplot.ls[c(ppcv_ggplot.vc[1],
"void",
ppcv_ggplot.vc[-1])]
ppcv_ggplot.gg <- gridExtra::grid.arrange(grobs = ppcv_ggplot.ls,
nrow = ceiling(length(ProMetIS::sets.vc()) / 2), ncol = 2)
# plot(cvplot.gg)
ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS11_cv_sex.pdf", ppcv_ggplot.gg,
width = 14, height = 27)
imputed_mi.ls <- sapply(ProMetIS::proteo_sets.vc(),
function(proteo_set.c) {
ProMetIS:::.imputation_info(eset = pp.mset[[proteo_set.c]],
set.c = proteo_set.c)
})
ppcvimp_ggplot.ls <- list()
for (set.c in grep("proteomics", names(pp.mset), value = TRUE)) {
pp.eset <- pp.mset[[set.c]]
ppexprs.mn <- Biobase::exprs(pp.eset)
pp_genesex.vc <- paste0(Biobase::pData(pp.eset)[, "gene"], "_",
Biobase::pData(pp.eset)[, "sex"])
imputed.ml <- imputed.mi <- imputed_mi.ls[[set.c]]
mode(imputed.ml) <- "logical"
for (imputed.l in c(TRUE, FALSE)) {
ppexprsimp.mn <- ppexprs.mn
if (!imputed.l) {
ppexprsimp.mn[imputed.ml] <- NA_real_
}
cat("\n\n", set.c, ", ", imputed.l, ":", sep = "")
cat("\nsum of values: ", sum(ppexprsimp.mn, na.rm = TRUE), sep = "")
print(summary(c(ppexprsimp.mn)))
if (imputed.l) {
cat("\nnumber of imputated values: ", sum(imputed.mi),
" (", round(sum(imputed.mi) / cumprod(dim(imputed.mi))[2] * 100), "%)", sep = "")
print(summary(c(ppexprsimp.mn[imputed.ml])))
}
ppcvimp.mn <- t(apply(ppexprsimp.mn, 1,
function(feat.vn) {
tapply(feat.vn, pp_genesex.vc, function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE) * 100)
}))
ppcvimp.df <- as.data.frame(tidyr::pivot_longer(tidyr::as_tibble(ppcvimp.mn),
cols = 1:ncol(ppcvimp.mn),
names_to = "features",
values_to = "CV"))
ppcvimp.df[, "gene"] <- factor(sapply(ppcvimp.df[, "features"],
function(feat.c)
unlist(strsplit(feat.c, "_"))[1]),
levels = c("WT", "LAT", "MX2"))
ppcvimp.df[, "sex"] <- factor(sapply(ppcvimp.df[, "features"],
function(feat.c)
unlist(strsplit(feat.c, "_"))[2]),
levels = c("M", "F"))
p <- ggplot2::ggplot(ppcvimp.df, ggplot2::aes(x = gene, y = CV, col = gene, fill = sex)) +
ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
ggplot2::scale_fill_manual(values = c(M = "lightblue2",
F = "lightpink1")) +
ggplot2::labs(title = paste0(set.c, " [", ifelse(imputed.l, "with", "without"), " imputation]"),
x = "", y = "CV (%)") +
ggplot2::theme_light() +
ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
legend.title = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 11, face = "bold"),
axis.text = ggplot2::element_text(size = 11, face = "bold"),
plot.title = ggplot2::element_text(size = 15, face = "bold"))
# print(p)
ppcvimp_ggplot.ls[[paste0(set.c, "_", ifelse(imputed.l, "with", "without"), "_imputation]")]] <- p
}
}
##
##
## proteomics_liver, TRUE:
## sum of values: 2126408 Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.38 20.79 23.11 23.15 25.43 33.49
##
## number of imputated values: 2598 (3%) Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.62 18.32 19.30 19.11 19.84 24.01
##
##
## proteomics_liver, FALSE:
## sum of values: 2076749 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 14.38 21.04 23.22 23.27 25.51 33.49 2598
##
##
## proteomics_plasma, TRUE:
## sum of values: 360250 Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.03 19.50 21.80 22.44 24.91 35.37
##
## number of imputated values: 797 (5%) Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.03 17.67 18.21 18.34 18.85 27.63
##
##
## proteomics_plasma, FALSE:
## sum of values: 345629.1 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 15.35 19.81 22.04 22.65 25.12 35.37 797
ppcvimp_ggplot.gg <- gridExtra::grid.arrange(grobs = ppcvimp_ggplot.ls,
nrow = 2, ncol = 2)
# plot(cvplot.gg)
ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS12_cv_imp.pdf", ppcvimp_ggplot.gg,
width = 14, height = 14)
for (set.c in names(pp.mset)) {
# set.c <- names(pp.mset)[8]
pp.eset <- pp.mset[[set.c]]
if (grepl("metabolomics", set.c))
pp.eset <- pp.eset[Biobase::fData(pp.eset)[, "name"] != "", ]
if (which(names(pp.mset) == set.c) < 3) {
xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
Biobase::exprs(pp.eset)),
file = "figures/article_data/ImbertEtAl_2021_supp_table_part1.xlsx",
sheet = gsub("proteomics", "proteo",
gsub("metabolomics", "metabo", set.c)),
append = which(names(pp.mset) == set.c) > 1)
} else if (which(names(pp.mset) == set.c) < 5) {
xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
Biobase::exprs(pp.eset)),
file = "figures/article_data/ImbertEtAl_2021_supp_table_part2.xlsx",
sheet = gsub("proteomics", "proteo",
gsub("metabolomics", "metabo", set.c)),
append = which(names(pp.mset) == set.c) > 3)
} else if (which(names(pp.mset) == set.c) < 7) {
xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
Biobase::exprs(pp.eset)),
file = "figures/article_data/ImbertEtAl_2021_supp_table_part3.xlsx",
sheet = gsub("proteomics", "proteo",
gsub("metabolomics", "metabo", set.c)),
append = which(names(pp.mset) == set.c) > 5)
} else if (which(names(pp.mset) == set.c) < 9) {
xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
Biobase::exprs(pp.eset)),
file = "figures/article_data/ImbertEtAl_2021_supp_table_part4.xlsx",
sheet = gsub("proteomics", "proteo",
gsub("metabolomics", "metabo", set.c)),
append = which(names(pp.mset) == set.c) > 7)
} else {
xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
Biobase::exprs(pp.eset)),
file = "figures/article_data/ImbertEtAl_2021_supp_table_part5.xlsx",
sheet = gsub("proteomics", "proteo",
gsub("metabolomics", "metabo", set.c)),
append = which(names(pp.mset) == set.c) > 9)
}
}